/*

	Mediation Analysis
	
		Applies the mediation approach of Acharya, Blackwell, and Sen to the 2013 MT exam data

			
*/

clear all
*set trace on

*bring in data
	use "${dta_dir}/5_Mediation_Analysis_Data.dta", clear
		
	tostring Group, replace

	
*clear all text files
	cd "${dir}"
	local textfiles: dir "${dir}" files "*.txt"
	foreach textfile of local textfiles {
			erase "`textfile'"
	}


*types of mediators we will use
	global mediator_types attend pv_all tv pv_read pv_write pv_sl  

*lists of intermediate confounders and mediators
	
	global intermediate_confounders n_obs_windows _Jround_2 _Jround_3 _Jperiod_2 _Jperiod_3 _Jday_of_we_2 _Jday_of_we_3 _Jday_of_we_4 _Jday_of_we_5 ///
		_Jenumerato_2 _Jenumerato_3 _Jenumerato_4 _Jenumerato_5 _Jenumerato_6 _Jenumerato_7 _Jenumerato_8
	
*lists of mediator vars
	*this is all index vars from the classroom obs
		
	*teacher actions
	*overall
	global pv_all pct_ta17_minutes_teaching pct_ta16_minutes_inclass_nt pct_ta15_minutes_outside_cls pct_par15_minutes_on_reading pct_paw15_minutes_on_writing pct_pasl7_minutes_on_sl
		*omitted vars
			*pct_ta16_minutes_inclass_nottea pct_ta15_minutes_outside_class - collinear with pct_ta17_minutes_teaching
			*pct_pasl7_minutes_on_speaklisten - jointly collinear with pct_par15_minutes_on_reading and pct_paw15_minutes_on_writing
		
	global tv tf_keeps_students_focused tf_solid_lesson_plan tf_active_throughout_cls
		*omitted vars
			*ta2_moves_freely - collinear with ta8_remains_at_front
			*ta9_doesnt_callon_individ - collinear with ta3_calls_on_infidividuals
			*ta10_verylittle_participation - collinear with ta4_encourages_participation
			*ta11_ignores_pupils_off_task - collinear with ta5_brings_pupils_back
			*ta_n_negative - collinear with ta_n_positive		
			
	
	*pupil reading
	global pv_read rl_sounds_letters rl_wholelanguage_on_board rl_basics_breakoutsessions rl_lango_sentences_reader rl_primer_paragraphs		

	*pupil writing
	global pv_write wl_pictures_words_stories wl_copy_teacher_text wl_leblango_practice_slates wl_picts_ltrs_paper_hienrgy wl_lango_sentences_handwrtng

	*pupil speaking/listening
	global pv_sl sll_group_only sll_indiv_tchr_and_group
		
	global attend r1_AT1_Present r1_DiffClassroom_SameSchool r2_AT1_Present r2_DiffClassroom_SameSchool r3_AT1_Present r3_DiffClassroom_SameSchool


	*define list of mediators
	global mediators ${pv_all} ${tv} ${pv_read} ${pv_write} ${pv_sl} ${attend} /*${tchr}*/




			
	
	
*validate that we get back our basic results for this subsample w/expanded obs, once we cluster appropriately

	areg EL_EGRA_PCA_Index MT_Program CCT_Program BL_EGRA_PCA_Index, a(Group) cluster(School_Code)
	tab Group Study_Arm
	areg EL_EGRA_PCA_Index MT_Program CCT_Program BL_EGRA_PCA_Index, a(Group) cluster(School_Code), if !inlist(Group,"13","14","7","8")
	tab Group Study_Arm if !inlist(Group,"13","14","7","8")


	reg EL_EGRA_PCA_Index MT_Program CCT_Program BL_EGRA_PCA_Index, cluster(School_Code)
	lincom _b[MT_Program] - _b[CCT_Program]

	local b_full_sample_MT_CCT_diff = r(estimate)
	local se_full_sample_MT_CCT_diff = r(se)






keep if Study_Arm!=0
gen Full_Cost_Treatment = MT_Program
reg EL_EGRA_PCA_Index Full_Cost_Treatment BL_EGRA_PCA_Index, cluster(School_Code)		
	
local b_rstrctd_sample_MT_CCT_diff = _b[Full_Cost_Treatment]
local se_rstrctd_sample_MT_CCT_diff = _se[Full_Cost_Treatment]

*largely comparable point estimates on this restricted sample 
	
	
*BASIC STRATEGY
	/*
	First, we regress the out come on the mediator, treatment, and covariates (pretreatment and intermediate) to get an estimate of the demediation function.
	Second, we use the first stage to demediate the outcome and run a regression of this demediated outcome on the treatment and the pretreatment covariates.
	The marginal effect of the treatment in this second stage regression will be the estimate of the ACDE. 
	
	(from Acharya, Blackwell, and Sen, p. 20)
	
	
	*they recommend de-meaning mediators to improve interpretability 
		
	
	
	*/
	

*subtract control-group mean from each variable
	*all vars in lists of mediator vars
		*not including intermediate confounders - we aren't going to pull out their effects
	*"control group" here is the reduced-cost treatment
	
	global vars_to_demean ${mediators}

	foreach var of global vars_to_demean{

		sum `var' if Study_Arm==1
		replace `var' = `var' - r(mean)
		labvarch `var', postfix(" (de-meaned)")

	}	

	
*interact all mediators with treatment - already de-meaned all the mediators
foreach mediator_type of local mediator_types{
	global demed_calc_`mediator_type'
	local mediators_`mediator_type' ${`mediator_type'}
	foreach mediator of local mediators_`mediator_type'{
		
		global demed_calc_`mediator_type' ${demed_calc_`mediator_type'} - _b[`mediator']*`mediator'

	}
}


*build interactions for each mediator

global mediators_X_T
foreach mediator of global mediators{

	gen `mediator'_X_T = `mediator'*Full_Cost_Treatment

	global mediators_X_T ${mediators_X_T} `mediator'_X_T

}


global demed_calc
local mediators ${mediators}
foreach mediator of local mediators{
	
	global demed_calc ${demed_calc} - _b[`mediator']*`mediator' - _b[`mediator'_X_T]*`mediator'_X_T

}

*separate demediation calcs for each mediator type
local mediator_types ${mediator_types}

foreach mediator_type of local mediator_types{
	global demed_calc_`mediator_type'
	local mediators_`mediator_type' ${`mediator_type'}
	foreach mediator of local mediators_`mediator_type'{
		
		global demed_calc_`mediator_type' ${demed_calc_`mediator_type'} - _b[`mediator']*`mediator' - _b[`mediator'_X_T]*`mediator'_X_T

	}
}



	
*clear all text files
cd "${dir}"
local textfiles: dir "${dir}" files "*.txt"
foreach textfile of local textfiles {
        erase "`textfile'"
}
	
	
capture program drop mediation_analysis
program define mediation_analysis
syntax, outcome(string) outcome_raw(string) test(string)
	quietly{

		
		*demediated regression - overall
			
			*BUILD INTERACTIONS BETWEEN ALL VARS AND MEDIATOR (S)?
			reg EL_`outcome' Full_Cost_Treatment ${mediators} ${mediators_X_T} ${intermediate_confounders} ${baseline_confounders} BL_`outcome', cluster(School_Code)
			gen d`outcome' = EL_`outcome' ${demed_calc}
			
			
				
			*randomization inference
				local ri_pvals RC_vs_FC
				foreach ri_pval of local ri_pvals{
					preserve
						keep Study_Arm group group_`ri_pval' d`outcome' ${baseline_confounders} BL_`outcome' School_Code
						local treat_est
						if "`ri_pval'"=="RC_vs_FC"{
							local treat_est _b[2.Study_Arm]
						}
						ritest Study_Arm `treat_est', reps(1000) seed(100453) cluster(School_Code) strata(group_`ri_pval') noanalytics: ///
							areg d`outcome' i.Study_Arm ${baseline_confounders} BL_`outcome', a(group), `ifstatement'
						matrix define p_`ri_pval' =r(p)
						local p_`ri_pval' = p_`ri_pval'[1,1]
						
					restore
				}

			reg d`outcome' Full_Cost_Treatment ${baseline_confounders} BL_`outcome', cluster(School_Code)		
			gen samp = e(sample)

			*demediated treatment effect
			local bd`outcome' = _b[Full_Cost_Treatment]
			
			*other stats for table
				local adjr2 = round(e(r2_a),0.001)
				local N_demed = e(N)
				local N_Schools = e(N_clust)
				
				local stats adjr2
				foreach stat of local stats{
					if ``stat''>0 & ``stat''<1 {
						local `stat' 0``stat''
					}
					if ``stat''>-1 & ``stat''<0 {
						local `stat'=-1*``stat''
						local `stat'=round(``stat'',0.001)
						local `stat' -0``stat''
					}		
					local stat=substr("``stat''",1,strpos("``stat''",".")+3)
							
				}				

			local blank="_"
			
			#delimit;
			outreg2 Full_Cost_Treatment using "${dir}/Mediation Analysis `test' demediated Table ${S_DATE} ${year}.xls",
				keep(Full_Cost_Treatment)
				groupvar(Full_Cost_Treatment)
				addtext(RI p-value, `p_RC_vs_FC',Adjusted R-Squared,`adjr2')
				nocons nor2 noobs excel append dec(3);
			#delimit cr		
			
		*raw regressions
			reg EL_`outcome' Full_Cost_Treatment ${baseline_confounders} BL_`outcome', cluster(School_Code), if samp==1
		
			*raw treatment effect
			local br`outcome' = _b[Full_Cost_Treatment]

			*other stats for table
				local adjr2 = round(e(r2_a),0.001)
				local N_raw = e(N)
				local N_Schools = e(N_clust)

				
				sum EL_`outcome_raw' if Study_Arm==1 & e(sample)
				local mean = round(r(mean),0.001)
				local sd = round(r(sd),0.001)
				local pct_effect_mediators = round((`br`outcome'' - `bd`outcome'')/`br`outcome'',0.001)
				local stats mean sd adjr2 pct_effect_mediators
				foreach stat of local stats{
					if ``stat''>0 & ``stat''<1 {
						local `stat' 0``stat''
					}
					if ``stat''>-1 & ``stat''<0 {
						local `stat'=-1*``stat''
						local `stat'=round(``stat'',0.001)
						local `stat' -0``stat''
					}		
					local stat=substr("``stat''",1,strpos("``stat''",".")+3)
							
				}
				*end loop over stats	
				
			*make sure we have identical samples in both regressions
			assert `N_demed'==`N_raw'
					
			
			local blank="_"
			
			#delimit;
			outreg2 Full_Cost_Treatment using "${dir}/Mediation Analysis `test' raw Table ${S_DATE} ${year}.xls",
				keep(Full_Cost_Treatment)
				groupvar(Full_Cost_Treatment)
				addtext(Adjusted R-Squared,`adjr2',Blank,`blank',Number of Pupils,`N_raw',Blank,`blank',Pct Reduction through Demediation,`pct_effect_mediators',Blank,`blank',Reduced-Cost Treatment Mean,`mean',Reduced-Cost Treatment SD,`sd')
				nocons nor2 noobs excel append dec(3);
			#delimit cr

			
			drop samp
			
			
			

		
	}
	*end block to do quietly
			
end


*table for LN scores
mediation_analysis, outcome(EGRA_LN_Total_control_normed ) outcome_raw(EGRA_LN_Total_raw) test(EGRA)


*make tables for each test
local tests EGRA EGWA OE

foreach test of local tests{
		

		mediation_analysis, outcome(`test'_PCA_Index) outcome_raw(`test'_PCA_Index) test(`test')
		


	*end block to do default regs
	
		



}
*end loop over tests

	
	
*clear all text files
cd "${dir}"
local textfiles: dir "${dir}" files "*.txt"
foreach textfile of local textfiles {
        erase "`textfile'"
}
